@ Jul 2017
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import statsmodels
import statsmodels.api as sm
In [2]:
tickers = ['PEP', 'KO']
data = get_pricing(symbols(tickers), start_date='2006-01-01', end_date='2008-08-01',
fields='close_price', frequency='daily')
data.head()
Out[2]:
In [3]:
data.columns
Out[3]:
In [4]:
data.columns = [ticker.symbol for ticker in data.columns]
data.index.name = 'Date'
data.head()
Out[4]:
In [5]:
cumm_rtn = (1 + data.pct_change()).cumprod()
cumm_rtn.plot();
plt.ylabel('Cummulative Returns');
plt.xlabel('Time');
plt.title('Cummulative Plot of Pepsi and Coca-Cola');
In [6]:
colors = np.linspace(0.1, 1, len(data))
sc = plt.scatter(data[tickers[0]], data[tickers[1]], s=30,
c=colors, cmap=plt.get_cmap('jet'), edgecolor='k', alpha=0.7)
cb = plt.colorbar(sc)
cb.ax.set_yticklabels([str(p.date()) for p in data[::len(data)//9].index])
plt.xlabel(tickers[0])
plt.ylabel(tickers[1]);
Linear Regression in vector form
$$y = \beta x + \varepsilon$$where,
\begin{equation*} y = \begin{pmatrix} y_1 \\ y_2 \\ \vdots \\ y_n \end{pmatrix} \end{equation*}\begin{equation*} x = \begin{pmatrix} x_1^T \\ x_2^T \\ \vdots \\ x_n^T \end{pmatrix} \end{equation*}\begin{equation*} \beta = \begin{pmatrix} \beta_0 \\ \beta_1 \\ \vdots \\ \beta_p \end{pmatrix} \end{equation*}\begin{equation*} \varepsilon = \begin{pmatrix} \varepsilon_1 \\ \varepsilon_2 \\ \vdots \\ \varepsilon_n \end{pmatrix} \end{equation*}Spread
$$ \begin{align*} \varepsilon & = y - \begin{bmatrix} \beta_0 & \beta_1 \end{bmatrix} \begin{bmatrix} 1 \\ x \end{bmatrix} \\ \\ & = p^{KO} - \begin{bmatrix} \beta_0 & \beta_1 \end{bmatrix} \begin{bmatrix} 1 \\ p^{PEP} \end{bmatrix} \\ \\ & = p^{KO} - \beta_0 - \beta_1 p^{PEP} \end{align*} $$where,
$p = Price$
In [7]:
X = sm.add_constant(data[tickers[0]], prepend=False)
ols = sm.OLS(data[tickers[1]], X).fit()
beta = ols.params
y_fit = [X.min().dot(beta), X.max().dot(beta)]
In [8]:
print(ols.summary2())
In [9]:
colors = np.linspace(0.1, 1, len(data))
sc = plt.scatter(data[tickers[0]], data[tickers[1]], s=30,
c=colors, cmap=plt.get_cmap('jet'), edgecolor='k', alpha=0.7)
plt.plot([X.min()[0], X.max()[0]], y_fit, '--b', linewidth=3, label='Linear Regression Fit')
plt.legend()
cb = plt.colorbar(sc)
cb.ax.set_yticklabels([str(p.date()) for p in data[::len(data)//9].index])
plt.xlabel(tickers[0])
plt.ylabel(tickers[1]);
It is a perfect fit becasue this is in-sample
In [10]:
from sklearn.linear_model import LinearRegression
In [11]:
reg = LinearRegression(fit_intercept=True)
reg
Out[11]:
In [12]:
reg.fit(data[tickers[0]].reshape(-1,1), data[tickers[1]])
Out[12]:
In [13]:
print(reg.coef_)
print(reg.intercept_)
In [14]:
spread = pd.DataFrame(data[tickers[1]] - beta[0] * data[tickers[0]] - beta[1])
In [15]:
spread.columns = ['in-sample']
spread.head()
Out[15]:
In [16]:
spread.plot();
plt.xlabel('Time');
plt.ylabel('Spread');
plt.title('PEP-KO Spread');
In [17]:
adf = statsmodels.tsa.stattools.adfuller(spread['in-sample'], maxlag=1)
print('ADF test statistics: {:.03f}').format(adf[0])
print('p-value: {:.03f}').format(adf[1])
In [18]:
spread.plot();
plt.axhline(spread['in-sample'].mean(), ls='--', color='b');
plt.axhline(spread['in-sample'].mean() + spread['in-sample'].std(), ls='--', color='y');
plt.axhline(spread['in-sample'].mean() - spread['in-sample'].std(), ls='--', color='y');
In [19]:
data_oos = get_pricing(symbols(tickers), start_date='2008-08-01', end_date='2010-01-01',
fields='close_price', frequency='daily')
data_oos.columns = [ticker.symbol for ticker in data_oos.columns]
data_oos.index.name = 'Date'
In [20]:
spread_oos = spread.reindex((spread.index).union(data_oos.index))
In [21]:
spread_oos['out-of-sample'] = data_oos[tickers[1]] - beta[0] * data_oos[tickers[0]] - beta[1]
In [22]:
spread_oos.head()
Out[22]:
In [23]:
spread_oos.tail()
Out[23]:
In [24]:
spread_oos.plot();
plt.axhline(spread['in-sample'].mean(), ls='--', color='b');
plt.axhline(spread['in-sample'].mean() + spread['in-sample'].std(), ls='--', color='y');
plt.axhline(spread['in-sample'].mean() - spread['in-sample'].std(), ls='--', color='y');
In [25]:
data_all = data.append(data_oos)
colors = np.linspace(0.1, 1, len(data_all))
sc = plt.scatter(data_all[tickers[0]], data_all[tickers[1]], s=50, c=colors,
cmap=plt.get_cmap('jet'), edgecolor='k', alpha=0.7, label='Price Data')
plt.plot([X.min()[0], X.max()[0]], y_fit, '--b', linewidth=3, label='OLS Fit')
plt.legend()
cb = plt.colorbar(sc)
cb.ax.set_yticklabels([str(p.date()) for p in data_all[::len(data_all)//9].index])
plt.xlabel(tickers[0])
plt.ylabel(tickers[1]);
In [26]:
bt = get_backtest('59687abcabb3315736a24c90')
In [27]:
bt.create_full_tear_sheet()
In [28]:
bt = get_backtest('59687c3e6d859b532e83fc13')
In [29]:
bt.create_full_tear_sheet()